from IPython.display import Image
Image("AGDC_Arch.png")
AGDC Github: https://github.com/data-cube/agdc-v2
WOfS Github: https://github.com/GeoscienceAustralia/wofs
%matplotlib inline
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import math
import pandas
import sys
sys.path.insert(0,'/home/547/fxz547/myGithub/agdc-v2') #prepend a path
# import AGDC Python API modules
import datacube
from datacube.storage import masking
# Create an API data access object
dc = datacube.Datacube(app='PyconDemo')
dc
We can access the Postgres Database (metadata index) and its associated imagery data.
The Datacube provides pandas.DataFrame representations of the available products and measurements:
df_prodlist = dc.list_products()
df_prodlist.head()
Each of the datacube products may have multiple measurements.
Measurements are related to the sensors characteristics, also known as bands in the remote-sensing domain.
bands comes from electromagnetic (or light-wave) spectral ranges, generally include Red-Green-Blue (RGB).
df_measlist = dc.list_measurements()
df_measlist.head(10)
#To see the function signature?
#dc.load?
xp=(149.00, 149.20); yp=(-35.25, -35.35) # Lake Burley Griffin x=(149.07, 149.17), y=(-35.25, -35.35),
# xp=( 149.160, 149.170); yp=(-35.34 , -35.35 ) #a small area cover GA water pond
tp=('2015-01', '2015-12-31')
nbar = dc.load(product='ls8_nbar_albers', x=xp, y=yp, time=tp)
The returned data is an xarray.Dataset object, which is a labelled n-dimensional array wrapping a numpy array.
We can investigate the data to see the variables (measurement bands) and dimensions that were returned:
nbar
We can look at the data by name directly, or through the data_vars dictionary:
nbar.data_vars
nbar.green
print(nbar.red.shape)
print(nbar.green.shape)
print(nbar.blue.shape)
print(nbar.swir1.shape) # short wave Length Infrared sensor
print(nbar.time.min())
print(nbar.time.max())
itime=1
def show_images(nbar, itime):
print ("Showing images acquired in datetime ", nbar.time.values[itime])
red_img=nbar.red.isel(time=itime)
green_img=nbar.green.isel(time=itime)
blue_img = nbar.blue.isel(time=itime)
plt.figure( figsize=(16,14) )
plt.subplot( 1,3,1 )
plt.imshow(red_img, cmap='gray')
plt.title("Red band"); plt.xlabel('easting'); plt.ylabel('northing')
plt.colorbar(orientation='vertical', shrink=0.3, label='red sensor measurement');
plt.subplot( 1,3,2 )
#plt.imshow(subs_array) # plotting the subset data directly
plt.imshow(green_img, cmap='gray')
plt.title("Green band"); plt.xlabel('easting'); plt.ylabel('northing')
plt.colorbar(orientation='vertical', shrink=0.3, label='green sensor measurement');
plt.subplot( 1,3,3 )
#plt.imshow(subs_array) # plotting the subset data directly
plt.imshow(blue_img, cmap='gray')
plt.title("Blue band"); plt.xlabel('easting'); plt.ylabel('northing')
plt.colorbar(orientation='vertical', shrink=0.3, label='blue sensor measurement');
return
# show the images of bands at itime=0,1, 10
show_images(nbar, 1)
Each band is a grayscale image. They can be combined to make a better image.
# define a scale function to strech an image
def scale_array(arr, prcnt, min_val, max_val, nan_val):
"""
Linearly scales array 'arr' at the 'prcnt' percentile between 'min_val' and 'max_val',
replacing 'nan_val' values with NaN's.
#f_arr = 1.0*arr #.astype('float') # required for NaN's
"""
f_arr = arr.astype('float')
#f_arr[f_arr==nan_val] = np.nan
prcnt_delta = (100-prcnt)/2
clip_lim = np.nanpercentile(f_arr,(prcnt_delta,100-prcnt_delta))
f_arr = np.clip(f_arr,clip_lim[0],clip_lim[1])
f_arr = (f_arr-clip_lim[0]) / (clip_lim[1]-clip_lim[0])
f_arr = f_arr * (max_val-min_val) + min_val
return f_arr
def make_rgb_images(nbar, itime):
"""
Create a RGB image using bands acquired at itime
"""
print ("RGB image acquired in datetime ", nbar.time.values[itime])
red_img=nbar.red.isel(time=itime)
green_img=nbar.green.isel(time=itime)
blue_img = nbar.blue.isel(time=itime)
plt.figure( figsize=(16,12) )
red_img=nbar.red.isel(time=itime)
green_img=nbar.green.isel(time=itime)
blue_img = nbar.blue.isel(time=itime)
y_size = red_img.shape[0]; x_size = red_img.shape[1]
print (y_size,x_size)
#print red_img.shape
sB1data = scale_array(red_img, 99.0, 0, 255, -999)
sB2data = scale_array(green_img, 99.0, 0, 255, -999)
sB3data = scale_array(blue_img, 99.0, 0, 255, -999)
rgb_image = np.zeros((y_size, x_size, 3), dtype='uint8')
rgb_image[:,:,0] = sB1data;
rgb_image[:,:,1] = sB2data;
rgb_image[:,:,2] = sB3data
plt.imshow(rgb_image, interpolation='none')
plt.title('Landsat Image Over Canberra Region')
plt.ylabel('northing'); plt.xlabel('easting');
# good images: 0, 1, 10
# cloudy images: itime=2,4, 5, 6,8,9
# no-data blank tiles: 3, 7
make_rgb_images(nbar, 1)
nbar_by_solar_day = dc.load(product='ls8_nbar_albers', x=xp, y=yp, time=tp, group_by='solar_day')
len(nbar_by_solar_day.time)
Now we have only 42 time slices of data, fewer than the 75 found previously without solar-day-grouping
According to Landsat cycle 16 days, 365/16 = 22 re-visit a place at least.
Here we have 42 days over Canberra reflect the overlap between passes.
Can be checked in the Clear Observation Layer at: http://eos-test.ga.gov.au/geoserver/www/remote_scripts/WOfS_v1.6.htm
# show the images of bands at itime=0,1,2,3,4,... 10
show_images(nbar_by_solar_day, 0)
# 0, 5, 8 not cloudy
# 1,2,3,4 cloudy day
make_rgb_images(nbar_by_solar_day, 2)
# from datacube.storage import masking
bands = dc.load(product='ls8_nbar_albers', x=xp, y=yp, time=tp, measurements=['red', 'nir', 'green', 'swir1'],
group_by='solar_day')
red = bands.red.where(bands.red != bands.red.attrs['nodata'])
nir = bands.nir.where(bands.nir != bands.nir.attrs['nodata'])
green = bands.green.where(bands.green != bands.green.attrs['nodata'])
swir1 = bands.swir1.where(bands.swir1 != bands.swir1.attrs['nodata'])
# Retrieve the associated Pixel Quality datasets
pq = dc.load(product='ls8_pq_albers', x=xp, y=yp, time=tp, group_by='solar_day')
cloud_free = masking.make_mask(pq, cloud_acca='no_cloud', cloud_fmask='no_cloud', contiguous=True).pixelquality
# Normalized Differenc Vegetation Index: Red and near Infrared bands
ndvi = ((nir - red) / (nir + red)).where(cloud_free)
# Normalized Differenc Water Index: Green and Shortwave Infrared Bands
ndwi = ((swir1- green)/(green + swir1)).where(cloud_free)
#ndwi = ((green- swir1)/(green + swir1)).where(cloud_free)
ndvi.shape
ndwi.shape
# skip ndvi plots
#ndvi.plot(col='time', col_wrap=6)
# xarray.Dataset.sum to reduce the datasets by selecting the time slices with high percentage cloud-free pixels
cloudfreeRatio = 0.8 # threshold of cloud pixel 80%
mostly_cloud_free = cloud_free.sum(dim=('x','y')) > (cloudfreeRatio * cloud_free.size / cloud_free.time.size)
print(mostly_cloud_free)
# How many images selected?
mostly_cloud_free.sum().values
# Apply the time-dim mask to the 3D-array (time, x, y)
mostly_good_ndvi = ndvi.where(mostly_cloud_free).dropna('time', how='all')
mostly_good_ndvi.plot(col='time', col_wrap=5)
# images after removed invalid pixels.
ndwi.plot(col='time', col_wrap=6)
# apply the cloud_threshold mask, which will select a subset images with good pixels.
mostly_good_ndwi = ndwi.where(mostly_cloud_free).dropna('time', how='all')
mostly_good_ndwi.plot(col='time', col_wrap=5)
plt.figure( figsize=(16,12) )
plt.subplot( 2,2,1 )
mostly_good_ndvi.median(dim='time').plot()
plt.title("Median Normalised Difference Vegetation Index - NDVI"); plt.xlabel('easting'); plt.ylabel('northing')
plt.subplot( 2,2,2 )
mostly_good_ndvi.mean(dim='time').plot()
# ndvi.mean(dim='time').plot()
plt.title("Mean Normalised Difference Vegetation Index - NDVI"); plt.xlabel('easting'); plt.ylabel('northing')
#------------------------------
plt.subplot( 2,2,3 )
mostly_good_ndwi.median(dim='time').plot()
plt.title("Median Normalised Difference Water Index - NDWI"); plt.xlabel('easting'); plt.ylabel('northing')
plt.subplot( 2,2,4 )
mostly_good_ndwi.mean(dim='time').plot()
# ndwi.mean(dim='time').plot()
plt.title("Mean Normalised Difference Water Index - NDWI"); plt.xlabel('easting'); plt.ylabel('northing')
http://eos-test.ga.gov.au/geoserver/www/remote_scripts/WOfS_v1.6.htm
grid = dc.load(product='dsm1sv10', x=(149.07, 149.17), y=(-35.25, -35.35))
grid.elevation
grid.elevation[0].plot()
!getfacl /g/data/v10/projects/ingest_test_data/milestone1/dsm1sv1_0_Clean/
albers_grid = dc.load(product='dsm1sv10', x=(149.07, 149.17), y=(-35.25, -35.35),
output_crs='EPSG:3577', resolution=(-25,25))
albers_grid.elevation.shape
albers_grid.elevation[0].plot()